import random
from datetime import datetime

from faker import Faker


def generate_user_info_data(num_records=2000):
    """为 user_info 表生成数据"""
    fake_cn = Faker('zh_CN')
    Faker.seed(42)  # 设置种子保证可重复性

    data = []

    for i in range(num_records):
        # 生成基础信息
        birth_date = fake_cn.date_of_birth(minimum_age=18, maximum_age=60)
        age = datetime.now().year - birth_date.year
        sex = fake_cn.random_element(['男', '女'])

        # 根据性别生成合适的姓名
        if sex == '男':
            real_name = fake_cn.name_male()
        else:
            real_name = fake_cn.name_female()

        # 生成入职日期（最近10年内）
        hire_date = fake_cn.date_between(start_date='-10y', end_date='today')

        # 根据年龄和工作经验生成合理的薪资
        work_experience = max(0, age - 22)  # 假设22岁开始工作
        base_salary = 3000 + work_experience * 500
        sal = round(random.uniform(base_salary * 0.8, base_salary * 1.5), 2)

        record = [
            i + 1,
            fake_cn.ssn(),
            f"user_{fake_cn.user_name()}_{i + 1}",
            real_name,
            fake_cn.password(length=10),
            fake_cn.phone_number()[:12],  # 确保不超过12位
            fake_cn.email(),
            age,
            sex,
            fake_cn.address().replace('\n', ' ')[:200],  # 确保不超过200个字符
            str(hire_date),
            sal,
            fake_cn.job()[:100],  # 确保不超过100个字符
            fake_cn.company()[:100]  # 确保不超过100个字符
        ]
        data.append(record)
    return data



